import pandas as pd
import plotly.express as px
df = pd.read_csv("netflix_titles.csv")
df.head()
| show_id | type | title | director | cast | country | date_added | release_year | rating | duration | listed_in | description | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | s1 | Movie | Dick Johnson Is Dead | Kirsten Johnson | NaN | United States | September 25, 2021 | 2020 | PG-13 | 90 min | Documentaries | As her father nears the end of his life, filmm... |
| 1 | s2 | TV Show | Blood & Water | NaN | Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban... | South Africa | September 24, 2021 | 2021 | TV-MA | 2 Seasons | International TV Shows, TV Dramas, TV Mysteries | After crossing paths at a party, a Cape Town t... |
| 2 | s3 | TV Show | Ganglands | Julien Leclercq | Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi... | NaN | September 24, 2021 | 2021 | TV-MA | 1 Season | Crime TV Shows, International TV Shows, TV Act... | To protect his family from a powerful drug lor... |
| 3 | s4 | TV Show | Jailbirds New Orleans | NaN | NaN | NaN | September 24, 2021 | 2021 | TV-MA | 1 Season | Docuseries, Reality TV | Feuds, flirtations and toilet talk go down amo... |
| 4 | s5 | TV Show | Kota Factory | NaN | Mayur More, Jitendra Kumar, Ranjan Raj, Alam K... | India | September 24, 2021 | 2021 | TV-MA | 2 Seasons | International TV Shows, Romantic TV Shows, TV ... | In a city of coaching centers known to train I... |
df.dtypes
show_id object type object title object director object cast object country object date_added object release_year int64 rating object duration object listed_in object description object dtype: object
df["date_added"] = pd.to_datetime(df["date_added"])
df.describe()
| release_year | |
|---|---|
| count | 8807.000000 |
| mean | 2014.180198 |
| std | 8.819312 |
| min | 1925.000000 |
| 25% | 2013.000000 |
| 50% | 2017.000000 |
| 75% | 2019.000000 |
| max | 2021.000000 |
px.histogram(df, x = "release_year")
px.histogram(df, x = "date_added")
df["date_added_month"] = df["date_added"].dt.month
px.histogram(df, x="date_added_month")
df["date_added_day"] = df["date_added"].dt.day
px.histogram(df, x="date_added_day")
df["type"].value_counts()
Movie 6131 TV Show 2676 Name: type, dtype: int64
px.histogram(df, x="date_added_day",color="type")
df["country"].dropna().unique()[:10]
array(['United States', 'South Africa', 'India',
'United States, Ghana, Burkina Faso, United Kingdom, Germany, Ethiopia',
'United Kingdom', 'Germany, Czech Republic', 'Mexico', 'Turkey',
'Australia', 'United States, India, France'], dtype=object)
df.columns
Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added',
'release_year', 'rating', 'duration', 'listed_in', 'description',
'date_added_month', 'date_added_day'],
dtype='object')
df[["title", "country"]].head(10)
| title | country | |
|---|---|---|
| 0 | Dick Johnson Is Dead | United States |
| 1 | Blood & Water | South Africa |
| 2 | Ganglands | NaN |
| 3 | Jailbirds New Orleans | NaN |
| 4 | Kota Factory | India |
| 5 | Midnight Mass | NaN |
| 6 | My Little Pony: A New Generation | NaN |
| 7 | Sankofa | United States, Ghana, Burkina Faso, United Kin... |
| 8 | The Great British Baking Show | United Kingdom |
| 9 | The Starling | United States |
df["country"].str.split(",", expand=True).head(10)
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | United States | None | None | None | None | None | None | None | None | None | None | None |
| 1 | South Africa | None | None | None | None | None | None | None | None | None | None | None |
| 2 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 3 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 4 | India | None | None | None | None | None | None | None | None | None | None | None |
| 5 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 6 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 7 | United States | Ghana | Burkina Faso | United Kingdom | Germany | Ethiopia | None | None | None | None | None | None |
| 8 | United Kingdom | None | None | None | None | None | None | None | None | None | None | None |
| 9 | United States | None | None | None | None | None | None | None | None | None | None | None |
country = df.copy()
country = pd.concat([country,df["country"].str.split(",", expand=True)],axis=1)
pd.set_option('display.max_columns', None)
country.head(10)
| show_id | type | title | director | cast | country | date_added | release_year | rating | duration | listed_in | description | date_added_month | date_added_day | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | s1 | Movie | Dick Johnson Is Dead | Kirsten Johnson | NaN | United States | 2021-09-25 | 2020 | PG-13 | 90 min | Documentaries | As her father nears the end of his life, filmm... | 9.0 | 25.0 | United States | None | None | None | None | None | None | None | None | None | None | None |
| 1 | s2 | TV Show | Blood & Water | NaN | Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban... | South Africa | 2021-09-24 | 2021 | TV-MA | 2 Seasons | International TV Shows, TV Dramas, TV Mysteries | After crossing paths at a party, a Cape Town t... | 9.0 | 24.0 | South Africa | None | None | None | None | None | None | None | None | None | None | None |
| 2 | s3 | TV Show | Ganglands | Julien Leclercq | Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi... | NaN | 2021-09-24 | 2021 | TV-MA | 1 Season | Crime TV Shows, International TV Shows, TV Act... | To protect his family from a powerful drug lor... | 9.0 | 24.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 3 | s4 | TV Show | Jailbirds New Orleans | NaN | NaN | NaN | 2021-09-24 | 2021 | TV-MA | 1 Season | Docuseries, Reality TV | Feuds, flirtations and toilet talk go down amo... | 9.0 | 24.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 4 | s5 | TV Show | Kota Factory | NaN | Mayur More, Jitendra Kumar, Ranjan Raj, Alam K... | India | 2021-09-24 | 2021 | TV-MA | 2 Seasons | International TV Shows, Romantic TV Shows, TV ... | In a city of coaching centers known to train I... | 9.0 | 24.0 | India | None | None | None | None | None | None | None | None | None | None | None |
| 5 | s6 | TV Show | Midnight Mass | Mike Flanagan | Kate Siegel, Zach Gilford, Hamish Linklater, H... | NaN | 2021-09-24 | 2021 | TV-MA | 1 Season | TV Dramas, TV Horror, TV Mysteries | The arrival of a charismatic young priest brin... | 9.0 | 24.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 6 | s7 | Movie | My Little Pony: A New Generation | Robert Cullen, José Luis Ucha | Vanessa Hudgens, Kimiko Glenn, James Marsden, ... | NaN | 2021-09-24 | 2021 | PG | 91 min | Children & Family Movies | Equestria's divided. But a bright-eyed hero be... | 9.0 | 24.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 7 | s8 | Movie | Sankofa | Haile Gerima | Kofi Ghanaba, Oyafunmike Ogunlano, Alexandra D... | United States, Ghana, Burkina Faso, United Kin... | 2021-09-24 | 1993 | TV-MA | 125 min | Dramas, Independent Movies, International Movies | On a photo shoot in Ghana, an American model s... | 9.0 | 24.0 | United States | Ghana | Burkina Faso | United Kingdom | Germany | Ethiopia | None | None | None | None | None | None |
| 8 | s9 | TV Show | The Great British Baking Show | Andy Devonshire | Mel Giedroyc, Sue Perkins, Mary Berry, Paul Ho... | United Kingdom | 2021-09-24 | 2021 | TV-14 | 9 Seasons | British TV Shows, Reality TV | A talented batch of amateur bakers face off in... | 9.0 | 24.0 | United Kingdom | None | None | None | None | None | None | None | None | None | None | None |
| 9 | s10 | Movie | The Starling | Theodore Melfi | Melissa McCarthy, Chris O'Dowd, Kevin Kline, T... | United States | 2021-09-24 | 2021 | PG-13 | 104 min | Comedies, Dramas | A woman adjusting to life after a loss contend... | 9.0 | 24.0 | United States | None | None | None | None | None | None | None | None | None | None | None |
# lets change it from wide to long format
country = pd.melt(country, id_vars=['type','title'], value_vars=range(12))
country
| type | title | variable | value | |
|---|---|---|---|---|
| 0 | Movie | Dick Johnson Is Dead | 0 | United States |
| 1 | TV Show | Blood & Water | 0 | South Africa |
| 2 | TV Show | Ganglands | 0 | NaN |
| 3 | TV Show | Jailbirds New Orleans | 0 | NaN |
| 4 | TV Show | Kota Factory | 0 | India |
| ... | ... | ... | ... | ... |
| 105679 | Movie | Zodiac | 11 | None |
| 105680 | TV Show | Zombie Dumb | 11 | NaN |
| 105681 | Movie | Zombieland | 11 | None |
| 105682 | Movie | Zoom | 11 | None |
| 105683 | Movie | Zubaan | 11 | None |
105684 rows × 4 columns
# withount NA values
country = country[country["value"].notna()]
px.histogram(country, x="value")
country["value"] = country["value"].str.strip()
C:\Users\Thinkpad\AppData\Local\Temp\ipykernel_812\3566639422.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
px.histogram(country, x="value",color="type")
# lets include the type and sort it in descending order
px.histogram(country, x="value", color="type").update_xaxes(categoryorder="total descending")
country[country["value"] == "Egypt"]
| type | title | variable | value | |
|---|---|---|---|---|
| 457 | Movie | Lift Like a Girl | 0 | Egypt |
| 529 | Movie | Return of the Prodigal Son | 0 | Egypt |
| 859 | Movie | Asmaa | 0 | Egypt |
| 991 | Movie | One Like It | 0 | Egypt |
| 1137 | Movie | The Knight and the Princess | 0 | Egypt |
| ... | ... | ... | ... | ... |
| 11161 | Movie | Destiny | 1 | Egypt |
| 11173 | Movie | Alexandria: Again and Forever | 1 | Egypt |
| 11267 | Movie | The Other | 1 | Egypt |
| 14742 | Movie | The Square | 1 | Egypt |
| 22517 | TV Show | Black Crows | 2 | Egypt |
117 rows × 4 columns
# for example this one "Black Crows"
df[df["title"] == "Black Crows"]
| show_id | type | title | director | cast | country | date_added | release_year | rating | duration | listed_in | description | date_added_month | date_added_day | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 4903 | s4904 | TV Show | Black Crows | NaN | Rashed Al Shamrani, Sayed Rajab, Dina Talaat, ... | Saudi Arabia, Syria, Egypt, Lebanon, Kuwait | 2018-04-30 | 2017 | TV-14 | 1 Season | International TV Shows, TV Dramas | This drama portrays women and kids living unde... | 4.0 | 30.0 |
genre = df.copy()
genre = pd.concat([genre, df["listed_in"].str.split(",", expand=True)], axis=1)
genre
| show_id | type | title | director | cast | country | date_added | release_year | rating | duration | listed_in | description | date_added_month | date_added_day | 0 | 1 | 2 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | s1 | Movie | Dick Johnson Is Dead | Kirsten Johnson | NaN | United States | 2021-09-25 | 2020 | PG-13 | 90 min | Documentaries | As her father nears the end of his life, filmm... | 9.0 | 25.0 | Documentaries | None | None |
| 1 | s2 | TV Show | Blood & Water | NaN | Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban... | South Africa | 2021-09-24 | 2021 | TV-MA | 2 Seasons | International TV Shows, TV Dramas, TV Mysteries | After crossing paths at a party, a Cape Town t... | 9.0 | 24.0 | International TV Shows | TV Dramas | TV Mysteries |
| 2 | s3 | TV Show | Ganglands | Julien Leclercq | Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi... | NaN | 2021-09-24 | 2021 | TV-MA | 1 Season | Crime TV Shows, International TV Shows, TV Act... | To protect his family from a powerful drug lor... | 9.0 | 24.0 | Crime TV Shows | International TV Shows | TV Action & Adventure |
| 3 | s4 | TV Show | Jailbirds New Orleans | NaN | NaN | NaN | 2021-09-24 | 2021 | TV-MA | 1 Season | Docuseries, Reality TV | Feuds, flirtations and toilet talk go down amo... | 9.0 | 24.0 | Docuseries | Reality TV | None |
| 4 | s5 | TV Show | Kota Factory | NaN | Mayur More, Jitendra Kumar, Ranjan Raj, Alam K... | India | 2021-09-24 | 2021 | TV-MA | 2 Seasons | International TV Shows, Romantic TV Shows, TV ... | In a city of coaching centers known to train I... | 9.0 | 24.0 | International TV Shows | Romantic TV Shows | TV Comedies |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 8802 | s8803 | Movie | Zodiac | David Fincher | Mark Ruffalo, Jake Gyllenhaal, Robert Downey J... | United States | 2019-11-20 | 2007 | R | 158 min | Cult Movies, Dramas, Thrillers | A political cartoonist, a crime reporter and a... | 11.0 | 20.0 | Cult Movies | Dramas | Thrillers |
| 8803 | s8804 | TV Show | Zombie Dumb | NaN | NaN | NaN | 2019-07-01 | 2018 | TV-Y7 | 2 Seasons | Kids' TV, Korean TV Shows, TV Comedies | While living alone in a spooky town, a young g... | 7.0 | 1.0 | Kids' TV | Korean TV Shows | TV Comedies |
| 8804 | s8805 | Movie | Zombieland | Ruben Fleischer | Jesse Eisenberg, Woody Harrelson, Emma Stone, ... | United States | 2019-11-01 | 2009 | R | 88 min | Comedies, Horror Movies | Looking to survive in a world taken over by zo... | 11.0 | 1.0 | Comedies | Horror Movies | None |
| 8805 | s8806 | Movie | Zoom | Peter Hewitt | Tim Allen, Courteney Cox, Chevy Chase, Kate Ma... | United States | 2020-01-11 | 2006 | PG | 88 min | Children & Family Movies, Comedies | Dragged from civilian life, a former superhero... | 1.0 | 11.0 | Children & Family Movies | Comedies | None |
| 8806 | s8807 | Movie | Zubaan | Mozez Singh | Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanan... | India | 2019-03-02 | 2015 | TV-14 | 111 min | Dramas, International Movies, Music & Musicals | A scrappy but poor boy worms his way into a ty... | 3.0 | 2.0 | Dramas | International Movies | Music & Musicals |
8807 rows × 17 columns
genre = pd.melt(genre, id_vars=["type","title"], value_vars=range(3))
genre = genre[genre["value"].notna()]
genre
| type | title | variable | value | |
|---|---|---|---|---|
| 0 | Movie | Dick Johnson Is Dead | 0 | Documentaries |
| 1 | TV Show | Blood & Water | 0 | International TV Shows |
| 2 | TV Show | Ganglands | 0 | Crime TV Shows |
| 3 | TV Show | Jailbirds New Orleans | 0 | Docuseries |
| 4 | TV Show | Kota Factory | 0 | International TV Shows |
| ... | ... | ... | ... | ... |
| 26414 | TV Show | Zindagi Gulzar Hai | 2 | TV Dramas |
| 26415 | Movie | Zinzana | 2 | Thrillers |
| 26416 | Movie | Zodiac | 2 | Thrillers |
| 26417 | TV Show | Zombie Dumb | 2 | TV Comedies |
| 26420 | Movie | Zubaan | 2 | Music & Musicals |
19323 rows × 4 columns
genre["value"] = genre["value"].str.strip()
px.histogram(genre, x="value", color="type").update_xaxes(categoryorder="total descending")